Checking Species Abundance and Biomass for Previous Works

While investigating some discrepancies among different datasets a handful of corrections were made. This document is intended to see if any of those changes have important implications for some of the species-specific reports that have been done recently.

The two datasets being compared here are the survdat_Nye_allseasons.RData and the more recent NEFSC_BTS_all_seasons_03032021.RData.

# Cleanup functions
source(here("R/01_nefsc_ss_build.R"))

####  Survdat resource Load Data


# 2019 Data used in 2020 paper
load(paste0(nmfs_path, "Survdat_Nye_allseason.RData"))
survdat_nye  <- survdat %>% clean_names()

# Run cleanup
survdat_nye <- survdat_prep(survdat = survdat_nye) %>% 
  mutate(survdat_source = "survdat_nye")




# 2020 data received last august
load(paste0(res_path, "NMFS_trawl/Survdat_Nye_Aug 2020.RData"))
survdat_20 <- clean_names(survdat) 


# Run cleanup
survdat_20 <- survdat_prep(survdat = survdat_20) %>% 
  mutate(survdat_source = "survdat_nye2020")




# Data we just received in 2021 with errors located and corrected
load(paste0(nmfs_path, "NEFSC_BTS_all_seasons_03032021.RData"))
survdat_21 <- survey$survdat %>% clean_names()


# Run cleanup
survdat_21 <- survdat_prep(survdat = survdat_21) %>% 
  mutate(survdat_source = "survdat_2021")

rm(survdat, survey)

####  Load the species list
species_check <- read_csv(here("data/andrew_species/Assesmentfishspecies.csv"), 
                          col_types = cols())
species_check <- species_check %>% 
  clean_names() %>% 
  mutate(svspp = str_pad(svspp, 3, pad = "0", side = "left"),
         comname = tolower(comname),
         species = str_to_title(species)) %>% 
  arrange(svspp)


# Put in a list
source_list <- list("survdat_nye_2019" = survdat_nye,
                    "survdat_nye_2020" = survdat_20,
                    "survdat_2021"     = survdat_21)

Making Comparisons

# Make years comparable
# Filter species down for both

source_list <- map(source_list, function(survdat_data){
  survdat_data %>% 
    filter(est_year <= 2017,
           svspp %in% species_check$svspp)

  })


# Split them each by comname
source_splits <- map(source_list, function(source_data){
  source_split <- source_data %>% split(.$comname)
})




# Make  Comparisons

# Vector of species and their common names, atleast the common names Andrew used
andrew_species <- species_check$comname %>% setNames(species_check$svspp)

# Pull the species
species_comparisons <- imap(andrew_species, function(species_comname, species_svspp){
  
  # there are some name mismatches, so catch those
  in_nye  <- species_comname %in% names(source_splits$survdat_nye_2019)
  in_20   <- species_comname %in% names(source_splits$survdat_nye_2020)
  in_21   <- species_comname %in% names(source_splits$survdat_2021)
  in_both <- in_nye & in_21
  in_all  <- in_nye & in_20 & in_21
  
  if(in_both == FALSE){
    return(list("in_nye"  = in_nye, 
                "in_21"   = in_21,
                "in_all"  = in_all,
                "data"    = data.frame(),
                "metrics" = data.frame()))
  }
  
  
  
  # Pull just that species from the 2019 data
  summ_19 <- source_list$survdat_nye_2019 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(abund_19 = sum(abundance, na.rm = T),
              biom_19  = sum(biom_adj, na.rm = T),
              .groups  = "keep") %>% ungroup()
  
  # same for 2020 data
  summ_20 <- source_list$survdat_nye_2020 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(abund_20 = sum(abundance, na.rm = T),
              biom_20  = sum(biom_adj, na.rm = T),
              .groups = "keep") %>% ungroup()
  
  
   # and 2021 data
  summ_21 <- source_list$survdat_2021 %>% 
    filter(svspp == species_svspp) %>% 
    group_by(comname, est_year) %>% 
    summarise(abund_21 = sum(abundance, na.rm = T),
              biom_21  = sum(biom_adj, na.rm = T),
              .groups = "keep") %>% ungroup()
  
  # join them for side-by-side data
  combined_data <- left_join(summ_19, summ_20, by = c("comname", "est_year")) %>% 
    left_join(summ_21, by = c("comname", "est_year")) %>% 
    mutate(abund_change_19to21 = ((abund_21 - abund_19) / abund_19) * 100,
           biom_change_19to21  = ((biom_21 - biom_19) / biom_19) * 100,
           abund_change_20to21 = ((abund_21 - abund_20) / abund_20) * 100,
           biom_change_20to21  = ((biom_21 - biom_20) / biom_20) * 100) 
  
  #return(combined_data)
  
  # abunndance correlation
  abund_cor_19to21   <- cor(combined_data$abund_21, 
                            combined_data$abund_19, 
                            use = "pairwise.complete.obs")
  abund_cor_20to21   <- cor(combined_data$abund_21, 
                            combined_data$abund_20, 
                            use = "pairwise.complete.obs")
  
  # biomass correlation
  biomass_cor_19to21 <- cor(combined_data$biom_21, 
                            combined_data$biom_19,   
                            use = "pairwise.complete.obs")
  biomass_cor_20to21 <- cor(combined_data$biom_21, 
                            combined_data$biom_20,   
                            use = "pairwise.complete.obs")
  
  # average relative shift
  abund_shift_19to21 <- mean(combined_data$abund_change_19to21, na.rm = T)
  biom_shift_19to21  <- mean(combined_data$biom_change_19to21, na.rm = T)
  abund_shift_20to21 <- mean(combined_data$abund_change_20to21, na.rm = T)
  biom_shift_20to21  <- mean(combined_data$biom_change_20to21, na.rm = T)
  
  # put in list to export
  list("data" = combined_data,
       "metrics" = data.frame(
         "comname"            = species_comname,
         "svspp"              = species_svspp,
         "abund_corr_19to21"  = abund_cor_19to21,
         "abund_corr_20to21"  = abund_cor_20to21,
         "biom_corr_19to21"   = biomass_cor_19to21,
         "biom_corr_20to21"   = biomass_cor_20to21,
         "perc_abund_19to21"  = abund_shift_19to21,
         "perc_abund_20to21"  = abund_shift_20to21,
         "perc_biom_19to21"   = biom_shift_19to21,
         "perc_biom_20to21"   = biom_shift_20to21)
       )
  })


# put data and metrics into a table
comparison_data    <- map_dfr(species_comparisons, ~.x[["data"]]) 
comparison_metrics <- map_dfr(species_comparisons, ~.x[["metrics"]]) 

Comparing 2019 to 2021

Abundance

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, abund_corr_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = abund_corr_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Abundances")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_abund_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_abund_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    scale_x_continuous(labels = scales::percent_format()) +
    labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(abund_corr_19to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# filter species
corr_sub <- comparison_data %>% 
  filter(comname %in% corr_species) 

# how many species per plot
p1 <- corr_species[1:6]
p2 <- corr_species[-c(1:6)]

# p1
corr_sub %>% 
  filter(comname %in% p1) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 1")

# p1
corr_sub %>% 
  filter(comname %in% p2) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 2")

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_abund_19to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance",
       title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Biomass

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, biom_corr_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = biom_corr_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Biomasses")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_biom_19to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_biom_19to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(biom_corr_19to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# plot
if(length(corr_species) > 0) {
comparison_data %>% 
  filter(comname %in% corr_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass", 
       title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
       color = "Survdat Source")
}

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_biom_19to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_19, color = "Survdat Nye Allseason 2019")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass",
       title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Comparing 2020 to 2021

Abundance

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, abund_corr_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = abund_corr_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Abundances")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_abund_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_abund_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    scale_x_continuous(labels = scales::percent_format()) +
    labs(y = "", x = "Avg. Percent Change in Annual Abundance\nMoving from Old Data -> New Data")

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(abund_corr_20to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# filter species
corr_sub <- comparison_data %>% 
  filter(comname %in% corr_species) 

# how many species per plot
p1 <- corr_species[1:6]
#p2 <- corr_species[-c(1:6)]

# p1
corr_sub %>% 
  filter(comname %in% p1) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance", 
       title = paste0("Species with Annual Abundance Correlation <= ", corr_cutoff),
       color = "Survdat Source",
       subtitle = "Subset 1")

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_abund_20to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, abund_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, abund_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Abundance",
       title = paste0("Species with Avg. Shift in Abundance >= ", perc_cutoff, "%"),
       color = "Survdat Source")

Biomass

Correlation

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, biom_corr_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = biom_corr_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Correlation Between Total Annual Biomasses")

Annual Percent Change

comparison_metrics %>% 
  mutate(comname = fct_reorder(comname, perc_biom_20to21, max, .desc = F)) %>% 
  ggplot(aes(x = perc_biom_20to21, y = comname)) +
    geom_segment(aes(xend = 0, yend = comname), color = gmri_cols("gmri blue")) +
    geom_point(color = gmri_cols("gmri blue")) +
    labs(y = "", x = "Avg. Percent Change in Annual Biomasses\nMoving from Old Data -> New Data")

Problem Species

Poor Correlation

# correlation
corr_cutoff <- 0.8
corr_species <- comparison_metrics %>% 
  filter(biom_corr_20to21 <= corr_cutoff) %>% 
   mutate(comname = fct_drop(comname)) %>% 
  pull(comname)

# plot
if(length(corr_species) > 0){
comparison_data %>% 
  filter(comname %in% corr_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free" ) +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass", 
       title = paste0("Species with Annual Biomass Correlation <= ", corr_cutoff),
       color = "Survdat Source")
}

Large Percent Change

# percent change
perc_cutoff <- 25
perc_species <- comparison_metrics %>% 
  filter(perc_biom_20to21 >= perc_cutoff) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  pull(comname) 


comparison_data %>% 
  filter(comname %in% perc_species) %>% 
  mutate(comname = fct_drop(comname)) %>% 
  ggplot() +
  geom_line(aes(est_year, biom_20, color = "Survdat Nye - August 2020")) +
  geom_line(aes(est_year, biom_21, color = "Newest Survdat")) +
  facet_wrap(~comname, ncol = 1, scales = "free") +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_color_gmri() +
  labs(x = "", 
       y = "Annual Biomass",
       title = paste0("Species with Avg. Shift in Biomass >= ", perc_cutoff, "%"),
       color = "Survdat Source")

 

A work by Adam A. Kemberling

Akemberling@gmri.org